

load("Recalls_merged_2019.RData")
Data$kenteken<-as.character(Data$kenteken)

#Remove 2284 duplicates
Data$dupl<-duplicated(Data$kenteken)
kentekendupl<-subset(Data, dupl==TRUE, select=kenteken)
Data<-subset(Data, !c(kenteken %in% kentekendupl$kenteken) )
Data$dupl<-NULL



#Prepare for counting variables
Data$num<-1
#Is the car recalled AND only from Nov 2017?
Data$rec<-0
Data$rec[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw>="2017-11-01"]<-1


#Is the car recalled and BEFORE Nov 2017?
Data$rec_bef<-0
Data$rec_bef[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw<"2017-11-01"]<-1


#Calculate number of total cars and recently recalled cars within each type-variant
Temp<-aggregate(cbind(num, rec, rec_bef) ~ typegoedkeuringsnummer + variant, data=Data, FUN="sum")

#Variant ok if:
#1) Total cars > recalled cars
#2) At least one recalled car
#3) Type approval code present
#4) No cars recalled before Nov 2017


Temp$ok<- Temp$num>Temp$rec & Temp$rec>0 & Temp$rec_bef==0 & Temp$typegoedkeuringsnummer!=""
table(Temp$ok)
#3085 variants with recalls and within variation and without cars with earlier recalls



#Create unique identifier for variant
varvariant<-paste(Temp$typegoedkeuringsnummer[Temp$ok==TRUE],Temp$variant[Temp$ok==TRUE], sep="_")
Data$typevar<-paste(Data$typegoedkeuringsnummer,Data$variant, sep="_")

#Does the variant belong with those with sufficient variation in recalls?
Data$var_type_var<-Data$typevar %in% varvariant

table(Data$var_type_var)
#About 606700 cars

table(Data$var_type_var & Data$rec==TRUE)
#of which 333675 (55.00%) are recalled

#Remove unnecessary data
rm(varvariant, Temp, kentekendupl)
Data$typevarver<-NULL
Data$num<-NULL


#Get only cars belonging to variants with sufficient variation in recalls
Split<-subset(Data, var_type_var==TRUE )
rm(Data)

#Get monthly sequence
months<-seq(as.Date("2017/11/1"), as.Date("2019/03/01"), "months")


#Create base panel
Panel_var<-expand.grid(Split$kenteken, months)
names(Panel_var)<-c("kenteken", "date")
rm(months)


#Get date first registration, in monthly format (date changed to 1)
Temp<-subset(Split, select=c("kenteken", "typegoedkeuringsnummer", "variant", "uitvoering", "datumeersteafgiftenederland", "datumeerstetoelating"))
day(Temp$datumeersteafgiftenederland)<-01
day(Temp$datumeerstetoelating)<-01
#Merge
Panel_var<-merge(Panel_var, Temp, by="kenteken", all.x=TRUE)
#New cars already removed in 1d
rm(Temp)

#Order by plate and date
Panel_var<-Panel_var[order(Panel_var$kenteken, Panel_var$date),]


#Remove dates after car was exported abroad
Temp<-subset(Split, select=c("kenteken", "exported_last")) 
#Now the month is the month the change took place, not the month after
Temp$exported_last<-as.Date(Temp$exported_last, format="%Y-%m-%d") %m-% months(1) 
day(Temp$exported_last)<-01
Panel_var<-merge(Panel_var, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
#Keep only if export date is not later than current month, or is not exported at all
Panel_var<-subset(Panel_var, date<=exported_last | is.na(exported_last)==TRUE)



#Get resale dates
Temp<-subset(Split, select=c("kenteken", "datumtenaamstelling_all")) 
Panel_var<-merge(Panel_var, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel_var$resale<-0
#Mark whether car is sold in a given month in the panel
i<-str_detect(Panel_var$datumtenaamstelling_all, paste(month(Panel_var$date),year(Panel_var$date), sep="/"))
Panel_var$resale[i]<-1



#Get recall dates and recall fixing dates
Temp<-subset(Split, select=c(kenteken, referentiecoderdw, recall_new, recall_fixed)) 
Panel_var<-merge(Panel_var, Temp, by="kenteken", all.x=TRUE, all.y=FALSE)
day(Panel_var$recall_new)<-01
day(Panel_var$recall_fixed)<-01
#Dummy for date of recall
Panel_var$rec_new<-0
i<-Panel_var$recall_new==Panel_var$date & is.na(Panel_var$recall_new)==FALSE
Panel_var$rec_new[i]<-1
#Dummy for recall occurred (over time)
Panel_var$rec_new_t<-0
i<-Panel_var$recall_new<=Panel_var$date & is.na(Panel_var$recall_new)==FALSE
Panel_var$rec_new_t[i]<-1


#Generate number of months from recall
Panel_var$rec_new_dist<-1+((year(Panel_var$date)*12)+(month(Panel_var$date)))-((year(Panel_var$recall_new)*12)+month(Panel_var$recall_new))
Panel_var$rec_new_dist_both<-Panel_var$rec_new_dist
Panel_var$rec_new_dist[Panel_var$rec_new_dist<0]<-0
Panel_var$rec_new_dist[is.na(Panel_var$rec_new_dist)]<-0
Panel_var$rec_new_dist_both[is.na(Panel_var$rec_new_dist_both)]<-0

#Generate distance by 3 months groups
Panel_var$rec_new_dist_cat<-0
Panel_var$rec_new_dist_cat[Panel_var$rec_new_dist>=1 & Panel_var$rec_new_dist<=3]<-1
Panel_var$rec_new_dist_cat[Panel_var$rec_new_dist>=4 & Panel_var$rec_new_dist<=6]<-2
Panel_var$rec_new_dist_cat[Panel_var$rec_new_dist>=7 & Panel_var$rec_new_dist<=9]<-3
Panel_var$rec_new_dist_cat[Panel_var$rec_new_dist>=10 & Panel_var$rec_new_dist<=12]<-4
Panel_var$rec_new_dist_cat[Panel_var$rec_new_dist>=13]<-5
table(Panel_var$rec_new_dist_cat)

#Dummy for date of fixing
Panel_var$rec_fixed<-0
i<-Panel_var$recall_fixed==Panel_var$date & is.na(Panel_var$recall_fixed)==FALSE
Panel_var$rec_fixed[i]<-1
#Dummy for fixing occurred (over time)
Panel_var$rec_fixed_t<-0
i<-Panel_var$recall_fixed<=Panel_var$date & is.na(Panel_var$recall_fixed)==FALSE
Panel_var$rec_fixed_t[i]<-1


#Generate id for type-variant
Panel_var$id_var<-as.numeric(as.factor(paste(Panel_var$typegoedkeuringsnummer, Panel_var$variant, sep="_")))


Panel_var<-Panel_var[order(Panel_var$id, Panel_var$kenteken, Panel_var$date),]


Panel_var$kenteken<-as.character(Panel_var$kenteken)

#Drop obs if date<date car gets in NL
i<-Panel_var$datumeersteafgiftenederland>Panel_var$date
table(i, useNA="always")
Panel_var<-subset(Panel_var, i==FALSE)
rm(i)

#Generate variable on age of car
Panel_var$age_car<-floor( ( (month(Panel_var$date)+year(Panel_var$date)*12)-(month(Panel_var$datumeerstetoelating)+year(Panel_var$datumeerstetoelating)*12) )/12 )



#Save in R format
save(Panel_var, file="Panel_var.RData")


